library(here)

library(data.table)
library(fixest)
library(ivDiag)
library(sensemakr)
library(modelsummary)


# Load data -------------------------------------------------------------------------

ml_data <- fread(here('data', 'full_rol.csv'))

## Contract-intensive trade ---------------------------------------------------------

fullData <- data.table(year = integer(),
                       exporter = integer(),
                       country = integer(),
                       corr = numeric())

for (yr in 1996:2020) {
  temp <- fread(here('data', 'trade', 'n', paste0(yr, '.csv')))
  temp$year <- yr
  
  fullData <- rbind(fullData, temp)
}
rm(temp, yr)

fullData <- fullData[exporter != country]

countrycodes <- fread(here('data', 'trade','country_codes_V202201.csv'))

fullData <- merge(fullData, countrycodes[ , .(exporter = country_code,
                                              exporter_iso3c = iso_3digit_alpha)],
                  by = 'exporter',
                  all.x = TRUE)

fullData <- merge(fullData, countrycodes[ , .(country = country_code,
                                              country_iso3c = iso_3digit_alpha)],
                  by = 'country',
                  all.x = TRUE)

fullData <- fullData[ , .(exporter, exporter_iso3c, 
                          country, country_iso3c, 
                          year, corr)]
setorder(fullData, exporter, country)


### Merge with ML data --------------------------------------------------------------

fullData <- merge(fullData, ml_data[ , .(country_iso3c = iso3c, year, 
                                         country_ml = ml)],
                  by = c('country_iso3c', 'year'),
                  all.x = TRUE)

fullData <- fullData[!is.na(fullData$country_ml)]

fullData[ corr < 0 , corr := 0]
fullData[ , corr_sum := sum(corr, na.rm = T), by = .(exporter, year)]
fullData[ , weight := (corr / corr_sum) * country_ml, by = .(exporter, year)]

country_comp <- fullData[ , .(comp_ml_contract = sum(weight, na.rm = T),
                              comp_total_contract = sum(corr_sum, na.rm = T)), 
                          by = .(exporter_iso3c, year)]


ml_data <- merge(ml_data, country_comp[ , .(iso3c = exporter_iso3c, year, 
                                            comp_ml_contract, 
                                            comp_total_contract)],
                 all.x = TRUE)

rm(country_comp, countrycodes, fullData)

## Non-contract intensive trade -----------------------------------------------------

fullData <- data.table(year = integer(),
                       exporter = integer(),
                       country = integer(),
                       corr = numeric())

for (yr in 1996:2020) {
  temp <- fread(here('data', 'trade', 'rw', paste0(yr, '.csv')))
  temp$year <- yr
  
  fullData <- rbind(fullData, temp)
}
rm(temp, yr)

fullData <- fullData[exporter != country]

countrycodes <- fread(here('data', 'trade','country_codes_V202201.csv'))

fullData <- merge(fullData, countrycodes[ , .(exporter = country_code,
                                              exporter_iso3c = iso_3digit_alpha)],
                  by = 'exporter',
                  all.x = TRUE)

fullData <- merge(fullData, countrycodes[ , .(country = country_code,
                                              country_iso3c = iso_3digit_alpha)],
                  by = 'country',
                  all.x = TRUE)

fullData <- fullData[ , .(exporter, exporter_iso3c, 
                          country, country_iso3c, 
                          year, corr)]
setorder(fullData, exporter, country)


### Merge with ML data ---------------------------------------------------------------

fullData <- merge(fullData, ml_data[ , .(country_iso3c = iso3c, year, 
                                         country_ml = ml)],
                  by = c('country_iso3c', 'year'),
                  all.x = TRUE)

fullData <- fullData[!is.na(fullData$country_ml)]

fullData[ corr < 0 , corr := 0]
fullData[ , corr_sum := sum(corr, na.rm = T) , by = .(exporter, year)]
fullData[ , weight := (corr / corr_sum) * country_ml, by = .(exporter, year)]

country_comp <- fullData[ , .(comp_ml_non_contract = sum(weight, na.rm = T)), 
                          by = .(exporter_iso3c, year)]

ml_data <- merge(ml_data, country_comp[ , .(iso3c = exporter_iso3c, year, 
                                            comp_ml_non_contract)],
                 all.x = TRUE)

rm(country_comp, countrycodes, fullData)

# Save for use in Stata to estimate LIML and AR confidence sets
haven::write_dta(ml_data, here('data/full_iv.dta'))

# Main Tables -----------------------------------------------------------------------

## Table 1 and A6 -------------------------------------------------------------------

m_con_n <- feols(v2x_rule ~  csw0(l(nyc), 
                                  l(bits_ln),
                                  l(fdi_stock_ln),
                                  l(trade_ln)  ,
                                  l(gdppc_ln), 
                                  l(market_ln), 
                                  l(growth)) | iso3c + year | ml ~ l(scale(comp_ml_contract)),
                 panel.id = ~ iso3c + year,
                 data = ml_data[(ml_year > 1996 | is.na(ml_year))])

# Generate effective F-stats
vars <- c('comp_ml_contract', 'nyc', 'bits_ln',
          'trade_ln', 'fdi_stock_ln',
          'gdppc_ln', 'market_ln', 'growth')

ml_data[ , paste0(vars, '_l1') := shift(.SD, 1L, NA, 'lag'), 
         .SDcols = vars, 
         by = iso3c]

F_eff <- numeric()
F_eff <- c(F_eff, 
           eff_F(Y    = 'v2x_rule',
                 D    = 'ml',
                 Z    = 'comp_ml_contract_l1',
                 FE   = c('iso3c', 'year'),
                 cl   = 'iso3c',
                 data = ml_data[ml_year > 1996 | is.na(ml_year)])
)

for (v in c(3, 5, 8)) {
  F_eff <- c(F_eff, 
             eff_F(Y    = 'v2x_rule',
                   D    = 'ml',
                   Z    = 'comp_ml_contract_l1',
                   controls = paste0(vars[2:v], '_l1'),
                   FE   = c('iso3c', 'year'),
                   cl   = 'iso3c',
                   data = ml_data[ml_year > 1996 | is.na(ml_year)]
             ) ) # end append
}

F_eff <- round(F_eff, 2)

## Print table ----------------------------------------------------------------------

coefNames <- c('fit_ml' = 'Model Law',
               'l(scale(comp_ml_contract), 1)' = 'Export Competition',
               'l(nyc, 1)' = 'NYC',
               'l(bits_ln, 1)' = 'log BITs',
               'l(fdi_stock_ln, 1)' = 'log FDI Stock',
               'l(trade_ln, 1)' = 'log Trade Dep.',
               'l(gdppc_ln, 1)' = 'log GDP per cap.', 
               'l(market_ln, 1)' = 'log GDP', 
               'l(growth, 1)' = 'Growth')

models_con_n <- list(m_con_n[[1]], m_con_n[[3]],
                     m_con_n[[5]], m_con_n[[8]])

rows <- matrix(c('Effective F-stat', F_eff,
                 'Wu-Hausman', sapply(models_con_n, function (x) round(x$iv_wh$stat, 2)),
                 'Wu-Hausman p', sapply(models_con_n, function (x) round(x$iv_wh$p, 2))), 
               nrow = 3, byrow = T)

rows <- as.data.frame(rows)

models_n_fs <- list(m_con_n[[1]]$iv_first_stage$ml, m_con_n[[3]]$iv_first_stage$ml,
                    m_con_n[[5]]$iv_first_stage$ml, m_con_n[[8]]$iv_first_stage$ml)

# This produces Table A6. Table 1 simply drops the controls and adds the 
#   Anderson-Rubin confidence sets (estimated in analyses_iv.do)
modelsummary(list(models_con_n, models_n_fs),
             shape = 'rbind',
             stars = c('***' = .01, '**' = .05, '*' = .1),
             coef_map = coefNames,
             add_rows = rows,
             gof_omit = 'DF|Deviance|With|AIC|BIC|RMSE|Std.|FE|R2$',
             escape = FALSE,
             output = here('output', 'tables', 'table-A6.tex')
)


## Table 2 --------------------------------------------------------------------------

m_con_n_rf <- feols(v2x_rule ~  csw0(l(scale(comp_ml_contract)),
                                     l(nyc),
                                     l(bits_ln),
                                     l(fdi_stock_ln),
                                     l(trade_ln)  ,
                                     l(gdppc_ln), 
                                     l(market_ln), 
                                     l(growth)) | iso3c + year,
                    panel.id = ~ iso3c + year,
                    data = ml_data[ml_year > 1996 | is.na(ml_year)])

models_n_rf <- list(m_con_n_rf[[2]], m_con_n_rf[[4]],
                    m_con_n_rf[[6]], m_con_n_rf[[9]])

coefNames <- c('l(scale(comp_ml_contract), 1)' = 'Export Competition\\textsubscript{Diff.}',
               'l(nyc, 1)' = 'NYC',
               'l(bits_ln, 1)' = 'log BITs',
               'l(fdi_stock_ln, 1)' = 'log FDI Stock',
               'l(trade_ln, 1)' = 'log Trade Dep.',
               'l(gdppc_ln, 1)' = 'log GDP per cap.', 
               'l(market_ln, 1)' = 'log GDP', 
               'l(growth, 1)' = 'Growth')

get_q_vals <- function (m) {
  ovb_rf <- sensemakr(m,
                      method = 'feols',
                      treatment = 'l(scale(comp_ml_contract), 1)')
  return(scales::percent(unlist(ovb_rf$sensitivity_stats[c('r2yd.x', 'rv_q', 'rv_qa')])))
}

rows <- data.frame(ovb_q = c('$R^2_{Y \\sim D |{\\bf X}}$',
                             '$RV_{q = 1}$',
                             '$RV_{q = 1, \\alpha = 0.05}$'))

rows <- cbind(rows,
              sapply(models_n_rf, get_q_vals))

# This produces Table A7. Table 2 simply drops the controls

modelsummary(models_n_rf,
             stars = c('***' = .01, '**' = .05, '*' = .1),
             coef_map = coefNames,
             gof_omit = 'DF|Deviance|With|AIC|BIC|RMSE|Std.|FE|R2$',
             add_rows = rows,
             escape = TRUE,
             title = 'Competition in simple products, Reduced form estimates',
             output = here('output', 'tables', 'table-A7.tex')
)

rm(get_q_vals)

# Table A9 --------------------------------------------------------------------------

m_non_con_2sls <- feols(v2x_rule ~  csw0(l(nyc), 
                                         l(bits_ln),
                                         l(fdi_stock_ln),
                                         l(trade_ln)  ,
                                         l(gdppc_ln), 
                                         l(market_ln), 
                                         l(growth)) | iso3c + year | ml ~ l(scale(comp_ml_non_contract)),
                        panel.id = ~ iso3c + year,
                        data = ml_data[(ml_year > 1996 | is.na(ml_year))])

# Generate effective F-stats
vars <- c('comp_ml_non_contract', 'nyc', 'bits_ln',
          'trade_ln', 'fdi_stock_ln',
          'gdppc_ln', 'market_ln', 'growth')

ml_data[ , paste0(vars, '_l1') := shift(.SD, 1L, NA, 'lag'), 
         .SDcols = vars, 
         by = iso3c]

F_eff <- numeric()
F_eff <- c(F_eff, 
           eff_F(Y    = 'v2x_rule',
                 D    = 'ml',
                 Z    = 'comp_ml_non_contract_l1',
                 FE   = c('iso3c', 'year'),
                 cl   = 'iso3c',
                 data = ml_data[ml_year > 1996 | is.na(ml_year)])
)

for (v in c(3, 5, 8)) {
  F_eff <- c(F_eff, 
             eff_F(Y    = 'v2x_rule',
                   D    = 'ml',
                   Z    = 'comp_ml_non_contract_l1',
                   controls = paste0(vars[2:v], '_l1'),
                   FE   = c('iso3c', 'year'),
                   cl   = 'iso3c',
                   data = ml_data[ml_year > 1996 | is.na(ml_year)]
             ) ) # end append
}

F_eff <- round(F_eff, 2)

non_con_models <- list('Second stage' = 
                         list(m_non_con_2sls[[1]],
                              m_non_con_2sls[[3]],
                              m_non_con_2sls[[5]],
                              m_non_con_2sls[[8]]),
                       'First stage' = 
                         list(m_non_con_2sls[[1]]$iv_first_stage$ml,
                              m_non_con_2sls[[3]]$iv_first_stage$ml,
                              m_non_con_2sls[[5]]$iv_first_stage$ml,
                              m_non_con_2sls[[8]]$iv_first_stage$ml)
)

coefNames <- c('fit_ml' = 'Model Law',
               'l(scale(comp_ml_non_contract), 1)' = 'Export Competition\\textsubscript{Undiff.}',
               'l(nyc, 1)' = 'NYC',
               'l(bits_ln, 1)' = 'log BITs',
               'l(fdi_stock_ln, 1)' = 'log FDI Stock',
               'l(trade_ln, 1)' = 'log Trade Dep.',
               'l(gdppc_ln, 1)' = 'log GDP per cap.', 
               'l(market_ln, 1)' = 'log GDP', 
               'l(growth, 1)' = 'Growth')

rows <- matrix(c('Year \\& Unit FE',    '\\cmark',   '\\cmark',   '\\cmark',   '\\cmark',
                 'Effective F-stat', F_eff),
               ncol = 5, byrow = TRUE)
rows <- as.data.frame(rows)

modelsummary(non_con_models,
             stars = c('***' = .01, '**' = .05, '*' = .1),
             coef_map = coefNames,
             shape = 'rbind',
             gof_omit = 'DF|Deviance|With|AIC|BIC|RMSE|Std.|FE|R2$',
             add_rows = rows,
             escape = TRUE,
             title = '2SLS estimates. Export competition in non-contract intensive products does 
                        not predict Model Law enactment',
             output = here('output', 'tables', 'table-A9.tex')
)

# Table A10 -------------------------------------------------------------------------

m_non_con_rf <- feols(v2x_rule ~  csw0(l(scale(comp_ml_non_contract)),
                                       l(nyc),
                                       l(bits_ln),
                                       l(fdi_stock_ln),
                                       l(trade_ln)  ,
                                       l(gdppc_ln), 
                                       l(market_ln), 
                                       l(growth)) | iso3c + year,
                      panel.id = ~ iso3c + year,
                      data = ml_data[ml_year > 1996 | is.na(ml_year)])

models_non_con_rf <- list(m_non_con_rf[[2]], m_non_con_rf[[4]],
                          m_non_con_rf[[6]], m_non_con_rf[[9]])

coefNames <- c('l(scale(comp_ml_non_contract), 1)' = 'Export Competition\\textsubscript{Undiff.}',
               'l(nyc, 1)' = 'NYC',
               'l(bits_ln, 1)' = 'log BITs',
               'l(fdi_stock_ln, 1)' = 'log FDI Stock',
               'l(trade_ln, 1)' = 'log Trade Dep.',
               'l(gdppc_ln, 1)' = 'log GDP per cap.', 
               'l(market_ln, 1)' = 'log GDP', 
               'l(growth, 1)' = 'Growth')

rows <- matrix(c('Year \\& Unit FE',    '\\cmark',   '\\cmark',   '\\cmark',   '\\cmark'),
               ncol = 5, byrow = TRUE)
rows <- as.data.frame(rows)

modelsummary(models_non_con_rf,
             stars = c('***' = .01, '**' = .05, '*' = .1),
             coef_map = coefNames,
             gof_omit = 'DF|Deviance|With|AIC|BIC|RMSE|Std.|FE|R2$',
             add_rows = rows,
             escape = TRUE,
             title = 'Reduced-form estimates. Export competition in non-contract intensive products is 
                        uncorrelated with change in quality of domestic legal institutions.',
             output = here('output', 'tables', 'table-A10.tex')
)


# Table A11 --------------------------------------------------------------------------

m_con_n_bits <- feols(bits_ln ~  csw0(l(scale(comp_ml_contract)),
                                      l(nyc),
                                      l(fdi_stock_ln),
                                      l(trade_ln)  ,
                                      l(gdppc_ln), 
                                      l(market_ln), 
                                      l(growth)) | iso3c + year,
                      panel.id = ~ iso3c + year,
                      data = ml_data[ml_year > 1996 | is.na(ml_year)])

rows <- matrix(c('Year \\& Unit FE',    '\\cmark',   '\\cmark',   '\\cmark',   '\\cmark'),
               ncol = 5, byrow = TRUE)
rows <- as.data.frame(rows)

coefNames <- c('l(scale(comp_ml_contract), 1)' = 'Export Competition\\textsubscript{Diff}',
               'l(nyc, 1)' = 'NYC',
               'l(fdi_stock_ln, 1)' = 'log FDI Stock',
               'l(trade_ln, 1)' = 'log Trade Dep.',
               'l(gdppc_ln, 1)' = 'log GDP per cap.', 
               'l(market_ln, 1)' = 'log GDP', 
               'l(growth, 1)' = 'Growth')

m_con_n_bits <- m_con_n_bits[c(2, 3, 5, 8)]
names(m_con_n_bits) <- paste0('(', 1:4, ')')
modelsummary(m_con_n_bits,
             stars = c('***' = .01, '**' = .05, '*' = .1),
             coef_map = coefNames,
             gof_omit = 'DF|Deviance|With|AIC|BIC|RMSE|Std.|R2$|FE',
             add_rows = rows,
             escape = FALSE,
             output = here('output', 'tables', 'table-A11.tex'))

# Tables A12  ----------------------------------------------------------------------

m_con_total_fs <- feols(v2x_rule ~  csw0(l(nyc),
                                         l(bits_ln),
                                         l(fdi_stock_ln),
                                         l(trade_ln),
                                         l(gdppc_ln), 
                                         l(market_ln), 
                                         l(growth)) | iso3c + year | ml ~ l(scale(comp_total_contract)),
                        panel.id = ~ iso3c + year,
                        data = ml_data[(ml_year > 1996 | is.na(ml_year))])

con_total_models <- list(m_con_total_fs[[1]]$iv_first_stage$ml,
                         m_con_total_fs[[3]]$iv_first_stage$ml,
                         m_con_total_fs[[5]]$iv_first_stage$ml,
                         m_con_total_fs[[8]]$iv_first_stage$ml)

coefNames <- c('l(scale(comp_total_contract), 1)' = 'Export Competition\\textsubscript{Total Diff.}',
               'l(nyc, 1)' = 'NYC',
               'l(bits_ln, 1)' = 'log BITs',
               'l(fdi_stock_ln, 1)' = 'log FDI Stock',
               'l(trade_ln, 1)' = 'log Trade Dep.',
               'l(gdppc_ln, 1)' = 'log GDP per cap.', 
               'l(market_ln, 1)' = 'log GDP', 
               'l(growth, 1)' = 'Growth')

rows <- matrix(c('Year \\& Unit FE',    '\\cmark',   '\\cmark',   '\\cmark',   '\\cmark'),
               ncol = 5, byrow = TRUE)
rows <- as.data.frame(rows)

modelsummary(con_total_models,
             stars = c('***' = .01, '**' = .05, '*' = .1),
             coef_map = coefNames,
             gof_omit = 'DF|Deviance|With|AIC|BIC|RMSE|Std.|R2$|FE',
             add_rows = rows,
             escape = FALSE,
             output = here('output', 'tables', 'table-A12.tex'))


# Table A13 -------------------------------------------------------------------------

m_con_total_rf <- feols(v2x_rule ~  csw0(l(scale(comp_total_contract)),
                                     l(nyc), 
                                     l(bits_ln),
                                     l(fdi_stock_ln),
                                     l(trade_ln),
                                     l(gdppc_ln), 
                                     l(market_ln), 
                                     l(growth)) | iso3c + year,
                        panel.id = ~ iso3c + year,
                        data = ml_data[(ml_year > 1996 | is.na(ml_year))])

con_total_models_rf <- list(m_con_total_rf[[2]],
                            m_con_total_rf[[4]],
                            m_con_total_rf[[6]],
                            m_con_total_rf[[9]])

coefNames <- c('l(scale(comp_total_contract), 1)' = 'Export Competition\\textsubscript{Total Diff.}',
               'l(nyc, 1)' = 'NYC',
               'l(bits_ln, 1)' = 'log BITs',
               'l(fdi_stock_ln, 1)' = 'log FDI Stock',
               'l(trade_ln, 1)' = 'log Trade Dep.',
               'l(gdppc_ln, 1)' = 'log GDP per cap.', 
               'l(market_ln, 1)' = 'log GDP', 
               'l(growth, 1)' = 'Growth')

rows <- matrix(c('Year \\& Unit FE',    '\\cmark',   '\\cmark',   '\\cmark',   '\\cmark'),
               ncol = 5, byrow = TRUE)
rows <- as.data.frame(rows)


modelsummary(con_total_models_rf,
             stars = c('***' = .01, '**' = .05, '*' = .1),
             coef_map = coefNames,
             gof_omit = 'DF|Deviance|With|AIC|BIC|RMSE|Std.|R2$|FE',
             add_rows = rows,
             escape = FALSE,
             output = here('output', 'tables', 'table-A13.tex'))
